---
title: "Univariate Stock Predictions: LSTM, ARIMA, and prophet"
subtitle: "INFO 523 - Final Project"
author:
- name: "Matt Osterhoudt"
affiliations:
- name: "College of Information Science, University of Arizona"
description: "Project description"
format:
html:
code-tools: true
code-overflow: wrap
embed-resources: true
editor: visual
execute:
warning: false
echo: false
jupyter: python3
---
## Abstract
## Introduction/Question
## Approach
## Code & Visual Analysis
```{python}
#| label: load-packages
#| include: false
#| echo: false
#| warning: false
#| message: false
# Load packages here
import pandas as pd
import numpy as np
import seaborn as sns
import yfinance as yf
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima.model import ARIMA
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.metrics import RootMeanSquaredError
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.models import load_model
import warnings
warnings.filterwarnings('ignore')
from prophet import Prophet
import statsmodels.api as sm
import time
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
# SPY S&P 500 ETF Stock
ticker1 = yf.Ticker("SPY")
data1 = ticker1.history(period = "max")
data1 = data1.loc["2015-01-01":"2024-12-31"]
data1.to_csv("data/spy_2015_2024.csv")
#print(data1.to_string())
spy_close = data1[["Close"]]
# Schwab Index Stock
ticker2 = yf.Ticker("SWPPX")
data2 = ticker2.history(period = "max")
data2 = data2.loc["2015-01-01":"2024-12-31"]
data2.to_csv("data/swppx_2015_2024.csv")
#print(data2.to_string())
swppx_close = data2[["Close"]]
# Microsoft Stock. I wanted to include a consistent large cap stock
ticker3 = yf.Ticker("MSFT")
data3 = ticker3.history(period = "max")
data3 = data3.loc["2015-01-01":"2024-12-31"]
data3.to_csv("data/msft_2015_2024.csv")
#print(data3.to_string())
data3.info()
msft_close = data3[["Close"]]
```
```{python}
#| echo: false
#| warning: false
#| message: false
warnings.filterwarnings('ignore')
# ===============================
# MSFT ARIMA
# ===============================
# Setting a function for easier titling
def add_title(ax, title):
ax.set_title(title)
fig, ax1 = plt.subplots(figsize=(8, 5))
msft_close.plot(ax = ax1)
add_title(ax1, "MSFT Closing Price")
n = int(len(msft_close) * 0.8)
train_msft = msft_close.iloc[:n]
test_msft = msft_close.iloc[n:]
test_msft.info()
# Manually check for stationarity
# ACF and DACF Test
fig, ax2 = plt.subplots(figsize = (8, 5))
plot_acf(train_msft, ax = ax2)
add_title(ax2, "ACF of MSFT Closing Price")
fig, ax3 = plt.subplots(figsize = (8, 5))
plot_pacf(train_msft, ax = ax3)
add_title(ax3, "PACF of MSFT Closing Price")
# ADF Test
msft_adf_test = adfuller(train_msft["Close"])
print(f'p-value pre-difference: {msft_adf_test[1]}')
# Implement differencing
train_msft_diff = train_msft.diff().dropna()
fig, ax4 = plt.subplots(figsize = (8, 5))
train_msft_diff.plot(ax = ax4)
add_title(ax4, "Differenced MSFT Closing Price")
# PACF/ACF Differenced Plots
fig, ax5 = plt.subplots(figsize=(8, 5))
plot_acf(train_msft_diff, ax = ax5)
add_title(ax5, "ACF of Differenced MSFT")
fig, ax6 = plt.subplots(figsize=(8, 5))
plot_pacf(train_msft_diff, ax = ax6)
add_title(ax6, "PACF of Differenced MSFT")
# Data is now stationary after differencing, based on P-value
msft_adf_test_diff = adfuller(train_msft_diff["Close"])
print(f'p-value post-difference: {msft_adf_test_diff[1]}')
msft_arima_model = ARIMA(train_msft["Close"], order = (9, 1, 9), trend = 't')
msft_arima_result = msft_arima_model.fit()
print(msft_arima_result.summary())
msft_forecast_test = msft_arima_result.forecast(len(test_msft))
# Prediction
pred_msft = msft_arima_result.predict(
start = len(train_msft),
end = len(train_msft) + len(test_msft) - 1,
dynamic = True
)
# Align predicted index to test dates for plotting
pred_msft.index = test_msft.index
# Plot actual vs forecast
fig, ax = plt.subplots(figsize = (8, 5))
# Training data
train_msft["Close"].plot(ax = ax, label = "Train", color="blue")
# Test data
test_msft["Close"].plot(ax = ax, label = "Test", color="green")
# Forecasted values
pred_msft.plot(ax = ax, label = "Prediction", color = "red")
ax.set_title("MSFT ARIMA Forecast vs Actual")
ax.set_xlabel("Date")
ax.legend()
plt.show()
# ===============================
# SPY ARIMA
# ===============================
fig, ax1 = plt.subplots(figsize = (8, 5))
spy_close.plot(ax = ax1)
add_title(ax1, "SPY Closing Price")
n = int(len(spy_close) * 0.8)
train_spy = spy_close.iloc[:n]
test_spy = spy_close.iloc[n:]
#test_spy.info()
# Manually check for stationarity
# ACF and PACF Test
fig, ax2 = plt.subplots(figsize = (8, 5))
plot_acf(train_spy, ax = ax2)
add_title(ax2, "ACF of SPY Closing Price")
fig, ax3 = plt.subplots(figsize = (8, 5))
plot_pacf(train_spy, ax = ax3)
add_title(ax3, "PACF of SPY Closing Price")
# ADF Test
spy_adf_test = adfuller(train_spy["Close"])
print(f'p-value pre-difference: {spy_adf_test[1]}')
# Implement differencing
train_spy_diff = train_spy.diff().dropna()
fig, ax4 = plt.subplots(figsize = (8, 5))
train_spy_diff.plot(ax = ax4)
add_title(ax4, "Differenced SPY Closing Price")
# PACF/ACF Differenced Plots
fig, ax5 = plt.subplots(figsize = (8, 5))
plot_acf(train_spy_diff, ax = ax5)
add_title(ax5, "ACF of Differenced SPY")
fig, ax6 = plt.subplots(figsize = (8, 5))
plot_pacf(train_spy_diff, ax = ax6)
add_title(ax6, "PACF of Differenced SPY")
# Data is now stationary after differencing, based on P-value
spy_adf_test_diff = adfuller(train_spy_diff["Close"])
print(f'p-value post-difference: {spy_adf_test_diff[1]}')
spy_arima_model = ARIMA(train_spy["Close"], order = (9, 1, 6), trend = 't')
spy_arima_result = spy_arima_model.fit()
print(spy_arima_result.summary())
spy_forecast_test = spy_arima_result.forecast(len(test_spy))
# Prediction
pred_spy = spy_arima_result.predict(
start = len(train_spy),
end = len(train_spy) + len(test_spy) - 1,
dynamic = True
)
# Align predicted index to test dates for plotting
pred_spy.index = test_spy.index
# Plot actual vs forecast
fig, ax = plt.subplots(figsize=(8, 5))
train_spy["Close"].plot(ax = ax, label = "Train", color = "blue")
test_spy["Close"].plot(ax = ax, label = "Test", color = "green")
pred_spy.plot(ax= ax, label = "Prediction", color = "red")
ax.set_title("SPY ARIMA Forecast vs Actual")
ax.set_xlabel("Date")
ax.legend()
plt.show()
# ===============================
# SWPPX ARIMA
# ===============================
fig, ax1 = plt.subplots(figsize = (8, 5))
swppx_close.plot(ax = ax1)
add_title(ax1, "SWPPX Closing Price")
n = int(len(swppx_close) * 0.8)
train_swppx = swppx_close.iloc[:n]
test_swppx = swppx_close.iloc[n:]
#test_swppx.info()
# Manually check for stationarity
# ACF and PACF Test
fig, ax2 = plt.subplots(figsize = (8, 5))
plot_acf(train_swppx, ax = ax2)
add_title(ax2, "ACF of SWPPX Closing Price")
fig, ax3 = plt.subplots(figsize = (8, 5))
plot_pacf(train_swppx, ax = ax3)
add_title(ax3, "PACF of SWPPX Closing Price")
# ADF Test
swppx_adf_test = adfuller(train_swppx["Close"])
print(f'p-value pre-difference: {swppx_adf_test[1]}')
# Implement differencing
train_swppx_diff = train_swppx.diff().dropna()
fig, ax4 = plt.subplots(figsize = (8, 5))
train_swppx_diff.plot(ax = ax4)
add_title(ax4, "Differenced SWPPX Closing Price")
# PACF/ACF Differenced Plots
fig, ax5 = plt.subplots(figsize = (8, 5))
plot_acf(train_swppx_diff, ax = ax5)
add_title(ax5, "ACF of Differenced SWPPX")
fig, ax6 = plt.subplots(figsize = (8, 5))
plot_pacf(train_swppx_diff, ax = ax6)
add_title(ax6, "PACF of Differenced SWPPX")
# Data is now stationary after differencing, based on P-value
swppx_adf_test_diff = adfuller(train_swppx_diff["Close"])
print(f'p-value post-difference: {swppx_adf_test_diff[1]}')
swppx_arima_model = ARIMA(train_swppx["Close"], order = (2, 1, 2), trend = 't')
swppx_arima_result = swppx_arima_model.fit()
print(swppx_arima_result.summary())
swppx_forecast_test = swppx_arima_result.forecast(len(test_swppx))
# Prediction
pred_swppx = swppx_arima_result.predict(
start = len(train_swppx),
end = len(train_swppx) + len(test_swppx) - 1,
dynamic = True
)
# Align predicted index to test dates for plotting
pred_swppx.index = test_swppx.index
# Plot actual vs forecast
fig, ax = plt.subplots(figsize = (8, 5))
# Training data
train_swppx["Close"].plot(ax = ax, label = "Train", color = "blue")
# Test data
test_swppx["Close"].plot(ax = ax, label = "Test", color = "green")
# Forecasted values
pred_swppx.plot(ax = ax, label = "Prediction", color = "red")
ax.set_title("SWPPX ARIMA Forecast vs Actual")
ax.set_xlabel("Date")
ax.legend()
plt.show()
```
```{python}
#| echo: false
#| warning: false
#| message: false
warnings.filterwarnings('ignore')
# ===============================
# MSFT LSTM
# ===============================
msft_close_data = msft_close.values # Taking shape
# This time, I am scaling the data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_msft_close = scaler.fit_transform(msft_close_data)
# This function takes the data by intervals of 60, and converts it to supervised learning examples for later.
def create_sequences(data, interval = 60):
X = []
y = []
for i in range(interval, len(data)):
X.append(data[i-interval:i, 0])
y.append(data[i, 0])
return np.array(X), np.array(y)
interval = 60 # days of history per sample
X_all, y_all = create_sequences(scaled_msft_close, interval)
# Double checking the output
X_all.view()
y_all.view()
# Usual 80/20 split
n_train = int(len(X_all) * 0.8)
X_train, X_test = X_all[:n_train], X_all[n_train:]
y_train, y_test = y_all[:n_train], y_all[n_train:]
# LSTM Tensorflow needs input in [samples, timesteps, features] form, so this code converts our 2d input into 3d. The final "1" is the new features dimension.
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
# Building model with parameters
msft_lstm_model = Sequential()
msft_lstm_model.add(InputLayer(input_shape = (interval, 1)))
# Via online research as to what units to use
msft_lstm_model.add(LSTM(50))
msft_lstm_model.add(Dropout(0.2))
msft_lstm_model.add(Dense(8, "relu"))
msft_lstm_model.add(Dense(1, "linear"))
msft_lstm_model.summary()
# This code will save the best model
msft_cp = ModelCheckpoint("msft_lstm_model/.keras", save_best_only = True)
# Builds model and tracks MSE, RMSE, and default value loss
msft_lstm_model.compile(optimizer = "adam", loss = "mean_squared_error", metrics = [RootMeanSquaredError()])
# Training model with 32 sequences and 20 epoch parameter.
msft_lstm_model.fit(X_train, y_train, batch_size = 32, epochs = 20, validation_data = (X_test, y_test), callbacks = [msft_cp], verbose = 0)
# Loads best model for our plot later
msft_lstm_model = load_model("msft_lstm_model/.keras")
msft_predictions = msft_lstm_model.predict(X_test)
# Undoing the scaler
msft_predictions = scaler.inverse_transform(msft_predictions.reshape(-1, 1))
y_test_actual = scaler.inverse_transform(y_test.reshape(-1, 1))
# MSFT LSTM Plot
msft_train_dates = msft_close.index[interval:n_train + interval]
msft_test_dates = msft_close.index[n_train + interval:]
fig, ax = plt.subplots(figsize = (8,5))
ax.plot(msft_test_dates, y_test_actual, label = "Actual", color = "green")
ax.plot(msft_test_dates, msft_predictions, label = "Predicted", color = "red")
ax.set_title("MSFT LSTM Prediction vs Actual")
ax.set_xlabel("Date")
ax.set_ylabel("Price")
ax.legend()
plt.show()
# ===============================
# SPY LSTM
# ===============================
spy_close_data = spy_close.values # Taking shape
scaler = MinMaxScaler(feature_range = (0, 1))
scaled_spy_close = scaler.fit_transform(spy_close_data)
X_all, y_all = create_sequences(scaled_spy_close, interval = 60)
X_all.view()
y_all.view()
n_train = int(len(X_all) * 0.8)
X_train, X_test = X_all[:n_train], X_all[n_train:]
y_train, y_test = y_all[:n_train], y_all[n_train:]
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
spy_lstm_model = Sequential()
spy_lstm_model.add(InputLayer(input_shape = (interval, 1)))
spy_lstm_model.add(LSTM(50))
spy_lstm_model.add(Dropout(0.2))
spy_lstm_model.add(Dense(8, "relu"))
spy_lstm_model.add(Dense(1, "linear"))
spy_lstm_model.summary()
spy_cp = ModelCheckpoint("spy_lstm_model/.keras", save_best_only = True)
spy_lstm_model.compile(optimizer = "adam", loss = "mean_squared_error", metrics = [RootMeanSquaredError()])
spy_lstm_model.fit(X_train, y_train, batch_size=32, epochs = 20, validation_data = (X_test, y_test), callbacks = [spy_cp], verbose = 0)
spy_lstm_model = load_model("spy_lstm_model/.keras")
spy_predictions = spy_lstm_model.predict(X_test)
spy_predictions = scaler.inverse_transform(spy_predictions.reshape(-1, 1))
spy_y_test_actual = scaler.inverse_transform(y_test.reshape(-1, 1))
spy_train_dates = spy_close.index[interval:n_train + interval]
spy_test_dates = spy_close.index[n_train + interval:]
fig, ax = plt.subplots(figsize = (8,5))
ax.plot(spy_test_dates, spy_y_test_actual, label = "Actual", color = "green")
ax.plot(spy_test_dates, spy_predictions, label = "Predicted", color = "red")
ax.set_title("SPY LSTM Prediction vs Actual")
ax.set_xlabel("Date")
ax.set_ylabel("Price")
ax.legend()
plt.show()
test_spy_lstm = pd.DataFrame({
"Date": spy_test_dates,
"Actual": spy_y_test_actual.flatten(),
"Predicted": spy_predictions.flatten()
})
# ===============================
# SWPPX LSTM
# ===============================
swppx_close_data = swppx_close.values # Taking shape
scaler = MinMaxScaler(feature_range = (0, 1))
scaled_swppx_close = scaler.fit_transform(swppx_close_data)
X_all, y_all = create_sequences(scaled_swppx_close, interval = 60)
X_all.view()
y_all.view()
n_train = int(len(X_all) * 0.8)
X_train, X_test = X_all[:n_train], X_all[n_train:]
y_train, y_test = y_all[:n_train], y_all[n_train:]
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
swppx_lstm_model = Sequential()
swppx_lstm_model.add(InputLayer(input_shape = (interval, 1)))
swppx_lstm_model.add(LSTM(50))
swppx_lstm_model.add(Dropout(0.2))
swppx_lstm_model.add(Dense(8, "relu"))
swppx_lstm_model.add(Dense(1, "linear"))
swppx_lstm_model.summary()
swppx_cp = ModelCheckpoint("swppx_lstm_model/.keras", save_best_only = True)
swppx_lstm_model.compile(optimizer = "adam", loss ="mean_squared_error", metrics = [RootMeanSquaredError()])
swppx_lstm_model.fit(X_train, y_train, batch_size = 32, epochs = 20, validation_data = (X_test, y_test), callbacks = [swppx_cp], verbose = 0)
swppx_lstm_model = load_model("swppx_lstm_model/.keras")
swppx_predictions = swppx_lstm_model.predict(X_test)
swppx_predictions = scaler.inverse_transform(swppx_predictions.reshape(-1, 1))
swppx_y_test_actual = scaler.inverse_transform(y_test.reshape(-1, 1))
swppx_train_dates = swppx_close.index[interval:n_train + interval]
swppx_test_dates = swppx_close.index[n_train + interval:]
fig, ax = plt.subplots(figsize = (8,5))
ax.plot(swppx_test_dates, swppx_y_test_actual, label = "Actual", color = "green")
ax.plot(swppx_test_dates, swppx_predictions, label = "Predicted", color = "red")
ax.set_title("SWPPX LSTM Prediction vs Actual")
ax.set_xlabel("Date")
ax.set_ylabel("Price")
ax.legend()
plt.show()
test_swppx_lstm = pd.DataFrame({
"Date": swppx_test_dates,
"Actual": swppx_y_test_actual.flatten(),
"Predicted": swppx_predictions.flatten()
})
```
```{python}
#| include: false
#| echo: false
#| warning: false
#| message: false
warnings.simplefilter("ignore")
# ============================
# MSFT Prophet Forecast
# ============================
# Based on research, Prophet requires a dataframe with two columns: ds (datestamp) and y (numeric forecast measurement). Common naming conventions appear to be ds and y so I will keep that consistent.
msft_prophet_data = msft_close.reset_index()
# Ran into an error with the datetime format. Timezone must be removed.
msft_prophet_data["Date"] = pd.to_datetime(msft_prophet_data["Date"]).dt.tz_localize(None)
msft_prophet_data = msft_prophet_data[["Date", "Close"]].rename(columns = {"Date": "ds", "Close": "y"})
msft_prophet_data.head()
# Split to training and test again
n = int(len(msft_prophet_data) * 0.9)
train_msft_prophet = msft_prophet_data.iloc[:n]
test_msft_prophet = msft_prophet_data.iloc[n:]
# Setting the model parameters
msft_proph_model = Prophet(
growth = "linear",
yearly_seasonality = True,
weekly_seasonality = False,
daily_seasonality = False
)
msft_proph_model.fit(train_msft_prophet)
# Dataframe for the forecasting
future_msft_values = msft_proph_model.make_future_dataframe(periods = len(test_msft_prophet), freq = "B") # No weekends
forecast_msft = msft_proph_model.predict(future_msft_values)
forecast_msft.head()
# Plotting the Prophet Model
fig, ax = plt.subplots(figsize = (8, 5))
ax.plot(train_msft_prophet["ds"], train_msft_prophet["y"], label = "Train", color = "blue")
ax.plot(test_msft_prophet["ds"], test_msft_prophet["y"], label = "Test", color = "green")
ax.plot(forecast_msft["ds"], forecast_msft["yhat"], label = "Prophet Prediction", color = "red")
ax.set_title("MSFT Prophet Forecast vs Actual")
ax.set_xlabel("Date")
ax.legend()
plt.show()
fig = msft_proph_model.plot_components(forecast_msft)
fig.suptitle("MSFT Prophet Components", y = 1.02)
plt.show()
# ============================
# SWPPX Prophet Forecast
# ============================
swppx_prophet_data = swppx_close.reset_index()
# Ran into an error with the datetime format. Timezone must be removed.
swppx_prophet_data["Date"] = pd.to_datetime(swppx_prophet_data["Date"]).dt.tz_localize(None)
swppx_prophet_data = swppx_prophet_data[["Date", "Close"]].rename(columns = {"Date": "ds", "Close": "y"})
swppx_prophet_data.head()
# Split to training and test again
n = int(len(swppx_prophet_data) * 0.8)
train_swppx_prophet = swppx_prophet_data.iloc[:n]
test_swppx_prophet = swppx_prophet_data.iloc[n:]
# Setting the model parameters
swppx_proph_model = Prophet(
growth = "linear",
yearly_seasonality = True,
weekly_seasonality = False,
daily_seasonality = False
)
swppx_proph_model.fit(train_swppx_prophet)
# Dataframe for the forecasting
future_swppx_values = swppx_proph_model.make_future_dataframe(periods = len(test_swppx_prophet), freq = "B") # No weekends
forecast_swppx = swppx_proph_model.predict(future_swppx_values)
forecast_swppx.head()
# Plotting the Prophet Model
fig, ax = plt.subplots(figsize = (8, 5))
ax.plot(train_swppx_prophet["ds"], train_swppx_prophet["y"], label = "Train", color = "blue")
ax.plot(test_swppx_prophet["ds"], test_swppx_prophet["y"], label = "Test", color = "green")
ax.plot(forecast_swppx["ds"], forecast_swppx["yhat"], label = "Prophet Prediction", color = "red")
ax.set_title("SWPPX Prophet Forecast vs Actual")
ax.set_xlabel("Date")
ax.legend()
plt.show()
fig = swppx_proph_model.plot_components(forecast_swppx)
fig.suptitle("SWPPX Prophet Components", y = 1.02)
plt.show()
# ============================
# SPY Prophet Forecast
# ============================
spy_prophet_data = spy_close.reset_index()
# Ran into an error with the datetime format. Timezone must be removed.
spy_prophet_data["Date"] = pd.to_datetime(spy_prophet_data["Date"]).dt.tz_localize(None)
spy_prophet_data = spy_prophet_data[["Date", "Close"]].rename(columns = {"Date": "ds", "Close": "y"})
spy_prophet_data.head()
# Split to training and test again
n = int(len(spy_prophet_data) * 0.8)
train_spy_prophet = spy_prophet_data.iloc[:n]
test_spy_prophet = spy_prophet_data.iloc[n:]
# Setting the model parameters
spy_proph_model = Prophet(
growth = "linear",
yearly_seasonality = True,
weekly_seasonality = False,
daily_seasonality = False
)
spy_proph_model.fit(train_spy_prophet)
# Dataframe for the forecasting
future_spy_values = spy_proph_model.make_future_dataframe(periods = len(test_spy_prophet), freq = "B") # No weekends
forecast_spy = spy_proph_model.predict(future_spy_values)
forecast_spy.head()
# Plotting the Prophet Model
fig, ax = plt.subplots(figsize = (8, 5))
ax.plot(train_spy_prophet["ds"], train_spy_prophet["y"], label = "Train", color = "blue")
ax.plot(test_spy_prophet["ds"], test_spy_prophet["y"], label = "Test", color = "green")
ax.plot(forecast_spy["ds"], forecast_spy["yhat"], label = "Prophet Prediction", color = "red")
ax.set_title("SPY Prophet Forecast vs Actual")
ax.set_xlabel("Date")
ax.legend()
plt.show()
fig = spy_proph_model.plot_components(forecast_spy)
fig.suptitle("SPY Prophet Components", y = 1.02)
plt.show()
```
```{python}
# Model Comparison
# Defining a function to check the values. It was necessary to do a merge because of the inconsistent sizes.
def eval_prophet(test_df, forecast_df, lab):
merged = pd.merge(
test_df[["ds", "y"]],
forecast_df[["ds", "yhat"]],
on = "ds",
how = "inner"
)
y_true = merged["y"].values
y_pred = merged["yhat"].values
mean_y = np.mean(y_true)
mse = mean_squared_error(y_true, y_pred)
mae = mean_absolute_error(y_true, y_pred)
r2 = r2_score(y_true, y_pred)
nmse = mse / mean_y
nmae = mae / mean_y
print(f"Prophet {lab}: "
f"MSE: {mse:} | MAE: {mae:} | R²: {r2:} | "
f"NMSE: {nmse:} | NMAE: {nmae:}")
eval_prophet(test_msft_prophet, forecast_msft, "MSFT")
eval_prophet(test_swppx_prophet, forecast_swppx, "SWPPX")
eval_prophet(test_spy_prophet, forecast_spy, "SPY")
# Function for ARIMA testing
def eval_arima(test_df, forecast_df, lab):
# Ensuring consistency across the forecasting models. Forecast is merged with the test set, and the models may store dates in different ways. This logic ensures that the alignment is consistent across the functions.
if isinstance(forecast_df, pd.Series):
forecast_df = forecast_df.to_frame(name = "yhat")
else:
forecast_df = forecast_df.rename(columns = {forecast_df.columns[0]: "yhat"})
merged = pd.merge(
test_df.reset_index()[["Date", "Close"]],
forecast_df.reset_index()[["Date", "yhat"]],
on = "Date",
how = "inner"
)
y_true = merged["Close"].values
y_pred = merged["yhat"].values
mean_y = np.mean(y_true)
mse = mean_squared_error(y_true, y_pred)
mae = mean_absolute_error(y_true, y_pred)
r2 = r2_score(y_true, y_pred)
nmse = mse / mean_y
nmae = mae / mean_y
print(f"ARIMA {lab}: "
f"MSE: {mse:} | MAE: {mae:} | R²: {r2:} | "
f"NMSE: {nmse:} | NMAE: {nmae:}")
eval_arima(test_msft, pred_msft, "MSFT")
eval_arima(test_swppx, pred_swppx, "SWPPX")
eval_arima(test_spy, pred_spy, "SPY")
# I did this function a little differently because I was running into issues...
def eval_lstm(y_true, y_pred, lab):
# Flatten to 1D
y_true = y_true.flatten()
y_pred = y_pred.flatten()
mse = mean_squared_error(y_true, y_pred)
mae = mean_absolute_error(y_true, y_pred)
r2 = r2_score(y_true, y_pred)
nmse = mse / np.var(y_true) # variance-based
nmae = mae / np.mean(y_true) # mean-based
print(f"LSTM {lab}: "
f"MSE: {mse:} | MAE: {mae:} | R²: {r2:} | "
f"NMSE: {nmse:} | NMAE: {nmae:}")
eval_lstm(y_test_actual, msft_predictions, "MSFT")
eval_lstm(swppx_y_test_actual, swppx_predictions, "SWPPX")
eval_lstm(spy_y_test_actual, spy_predictions, "SPY")
```
| Stock | Model | MSE | MAE | R² | NMSE | NMAE |
|---------|---------|------------|----------|-----------|---------|---------|
| MSFT | Prophet | 4591.333 | 65.959 | -12.971 | 11.021 | 0.158 |
| MSFT | ARIMA | 13585.598 | 105.661 | -2.431 | 37.437 | 0.291 |
| MSFT | LSTM | 63.961 | 6.459 | 0.982 | 0.018 | 0.018 |
| SWPPX | Prophet | 18.707 | 3.719 | -4.200 | 1.693 | 0.337 |
| SWPPX | ARIMA | 8.303 | 2.365 | -1.056 | 0.742 | 0.211 |
| SWPPX | LSTM | 0.033 | 0.132 | 0.992 | 0.008 | 0.012 |
| SPY | Prophet | 21337.151 | 122.298 | -4.163 | 45.381 | 0.260 |
| SPY | ARIMA | 10072.653 | 84.816 | -1.219 | 21.212 | 0.179 |
| SPY | LSTM | 63.426 | 6.268 | 0.986 | 0.014 | 0.013 |
## Discussion
## Conclusion